# load data.table for faster operations
library(data.table)
library(tidyverse)
# use fread for fast reading of data csv files
  # ColumnNameCleaner.rmd should be run first
cases <- fread("Data/time_series_covid19_confirmed_US.csv")
vaccineCounty <- fread("Data/Vaccination/COVID-19_Vaccinations_in_the_United_States_County.csv")
#vaccineJurisdiction <- fread("Data/Vaccination/COVID-19_Vaccinations_in_the_United_States_Jurisdiction.csv")
pop <- fread("Data/Population/co-est2019-alldata.csv")
distancing <- fread("Data/Distancing/Unacast_Social_Distancing_Grades.csv")
masking <- fread("Data/Masking/mask-use-by-county.csv")
# preview the data tables
head(cases)
head(vaccineCounty)
#head(vaccineJurisdiction)
head(distancing)
head(masking)
head(pop)
# either create or fix the FIPS column using available data
cases[, FIPS := sprintf("%05d", cases[,FIPS])]
pop[, FIPS := sprintf("%02d%03d", pop[,STATE], pop[,COUNTY])]
# create a population density column too
distancing[, c("FIPS", "popDensity") := .(sprintf("%05d", county_fips), ((county_population / Shape__Area * 100) + 1))]
masking[, FIPS := sprintf("%05d", COUNTYFP)]
masking <- cbind("FIPS" = masking[, FIPS], masking[,!c("FIPS")] + 1)
# drops unneeded columns
cases <- cases[, !c("V1", "UID", "iso2", "iso3", "code3", "CombinedKey")]
# selects the most recent population estimate (2019) and drops what will be redundant columns
years2019 <- grep("^([^0-9]*)$|2019", colnames(pop))
pop <- pop[, ..years2019][, !c("SUMLEV", "STATE", "COUNTY", "STNAME", "CTYNAME")]
# drops what will be redundant columns
distancing <- distancing[, !c("OBJECTID", "state_fips", "state_name", "county_fips", "county_name")]
masking <- masking[, !c("COUNTYFP")]
totalCases <- grep("[0-9]{4}", colnames(cases))
cases[, TotalCases := rowSums(cases[, ..totalCases], na.rm = TRUE)]
Both 'totalCases' and '..totalCases' exist in calling scope. Please remove the '..totalCases' variable in calling scope for clarity.
grades <- c("A+"=4.3,"A"=4,"A-"=3.7,"B+"=3.3,"B"=3,"B-"=2.7, "C+"=2.3,"C"=2,"C-"=1.7,"D+"=1.3,"D"=1,"D-"= 0.7, "F"=0)

gfunc <- function(x) {
  grades[as.character(x)]
}

distancing[, c("grade_total", "grade_distance", "grade_visitation", "grade_encounters") :=
             .(gfunc(grade_total), gfunc(grade_distance), gfunc(grade_visitation), gfunc(grade_encounters))]
# set the FIPS column as the key for faster data.table operations
# time series
setkey(cases, FIPS)
setkey(vaccineCounty, FIPS)
setkey(distancing, FIPS)
# not time series
setkey(masking, FIPS)
setkey(pop, FIPS)
pivot_wider(vaccineCounty, names_from = Date, values_from = )
csv <- cases[pop,][distancing,][masking,]
write.csv(csv, "Data/casesPopDistancingMasking.csv", row.names = FALSE)
openxlsx::write.xlsx(csv, "Data/casesPopDistancingMasking.xlsx", overwrite = TRUE)
dates <- grep("D[0-9]{4}_[0-9]{2}_[0-9]{2}", colnames(csv))
openxlsx::write.xlsx(csv[, !..dates], "Data/noDates.xlsx", overwrite = TRUE)
csv
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCiMgbG9hZCBkYXRhLnRhYmxlIGZvciBmYXN0ZXIgb3BlcmF0aW9ucw0KbGlicmFyeShkYXRhLnRhYmxlKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpgYGANCg0KDQpgYGB7cn0NCiMgdXNlIGZyZWFkIGZvciBmYXN0IHJlYWRpbmcgb2YgZGF0YSBjc3YgZmlsZXMNCiAgIyBDb2x1bW5OYW1lQ2xlYW5lci5ybWQgc2hvdWxkIGJlIHJ1biBmaXJzdA0KY2FzZXMgPC0gZnJlYWQoIkRhdGEvdGltZV9zZXJpZXNfY292aWQxOV9jb25maXJtZWRfVVMuY3N2IikNCnZhY2NpbmVDb3VudHkgPC0gZnJlYWQoIkRhdGEvVmFjY2luYXRpb24vQ09WSUQtMTlfVmFjY2luYXRpb25zX2luX3RoZV9Vbml0ZWRfU3RhdGVzX0NvdW50eS5jc3YiKQ0KI3ZhY2NpbmVKdXJpc2RpY3Rpb24gPC0gZnJlYWQoIkRhdGEvVmFjY2luYXRpb24vQ09WSUQtMTlfVmFjY2luYXRpb25zX2luX3RoZV9Vbml0ZWRfU3RhdGVzX0p1cmlzZGljdGlvbi5jc3YiKQ0KcG9wIDwtIGZyZWFkKCJEYXRhL1BvcHVsYXRpb24vY28tZXN0MjAxOS1hbGxkYXRhLmNzdiIpDQpkaXN0YW5jaW5nIDwtIGZyZWFkKCJEYXRhL0Rpc3RhbmNpbmcvVW5hY2FzdF9Tb2NpYWxfRGlzdGFuY2luZ19HcmFkZXMuY3N2IikNCm1hc2tpbmcgPC0gZnJlYWQoIkRhdGEvTWFza2luZy9tYXNrLXVzZS1ieS1jb3VudHkuY3N2IikNCmBgYA0KDQoNCmBgYHtyfQ0KIyBwcmV2aWV3IHRoZSBkYXRhIHRhYmxlcw0KaGVhZChjYXNlcykNCmhlYWQodmFjY2luZUNvdW50eSkNCiNoZWFkKHZhY2NpbmVKdXJpc2RpY3Rpb24pDQpoZWFkKGRpc3RhbmNpbmcpDQpoZWFkKG1hc2tpbmcpDQpoZWFkKHBvcCkNCmBgYA0KDQoNCmBgYHtyfQ0KIyBlaXRoZXIgY3JlYXRlIG9yIGZpeCB0aGUgRklQUyBjb2x1bW4gdXNpbmcgYXZhaWxhYmxlIGRhdGENCmNhc2VzWywgRklQUyA6PSBzcHJpbnRmKCIlMDVkIiwgY2FzZXNbLEZJUFNdKV0NCnBvcFssIEZJUFMgOj0gc3ByaW50ZigiJTAyZCUwM2QiLCBwb3BbLFNUQVRFXSwgcG9wWyxDT1VOVFldKV0NCiMgY3JlYXRlIGEgcG9wdWxhdGlvbiBkZW5zaXR5IGNvbHVtbiB0b28NCmRpc3RhbmNpbmdbLCBjKCJGSVBTIiwgInBvcERlbnNpdHkiKSA6PSAuKHNwcmludGYoIiUwNWQiLCBjb3VudHlfZmlwcyksICgoY291bnR5X3BvcHVsYXRpb24gLyBTaGFwZV9fQXJlYSAqIDEwMCkgKyAxKSldDQptYXNraW5nWywgRklQUyA6PSBzcHJpbnRmKCIlMDVkIiwgQ09VTlRZRlApXQ0KbWFza2luZyA8LSBjYmluZCgiRklQUyIgPSBtYXNraW5nWywgRklQU10sIG1hc2tpbmdbLCFjKCJGSVBTIildICsgMSkNCmBgYA0KDQoNCmBgYHtyfQ0KIyBkcm9wcyB1bm5lZWRlZCBjb2x1bW5zDQpjYXNlcyA8LSBjYXNlc1ssICFjKCJWMSIsICJVSUQiLCAiaXNvMiIsICJpc28zIiwgImNvZGUzIiwgIkNvbWJpbmVkS2V5IildDQojIHNlbGVjdHMgdGhlIG1vc3QgcmVjZW50IHBvcHVsYXRpb24gZXN0aW1hdGUgKDIwMTkpIGFuZCBkcm9wcyB3aGF0IHdpbGwgYmUgcmVkdW5kYW50IGNvbHVtbnMNCnllYXJzMjAxOSA8LSBncmVwKCJeKFteMC05XSopJHwyMDE5IiwgY29sbmFtZXMocG9wKSkNCnBvcCA8LSBwb3BbLCAuLnllYXJzMjAxOV1bLCAhYygiU1VNTEVWIiwgIlNUQVRFIiwgIkNPVU5UWSIsICJTVE5BTUUiLCAiQ1RZTkFNRSIpXQ0KIyBkcm9wcyB3aGF0IHdpbGwgYmUgcmVkdW5kYW50IGNvbHVtbnMNCmRpc3RhbmNpbmcgPC0gZGlzdGFuY2luZ1ssICFjKCJPQkpFQ1RJRCIsICJzdGF0ZV9maXBzIiwgInN0YXRlX25hbWUiLCAiY291bnR5X2ZpcHMiLCAiY291bnR5X25hbWUiKV0NCm1hc2tpbmcgPC0gbWFza2luZ1ssICFjKCJDT1VOVFlGUCIpXQ0KYGBgDQoNCg0KYGBge3J9DQp0b3RhbENhc2VzIDwtIGdyZXAoIlswLTldezR9IiwgY29sbmFtZXMoY2FzZXMpKQ0KIyBPdmVyYWxsIHRvdGFsDQpjYXNlc1ssIFRvdGFsQ2FzZXMgOj0gcm93U3VtcyhjYXNlc1ssIC4udG90YWxDYXNlc10sIG5hLnJtID0gVFJVRSldDQojIFRvdGFsIGJlZm9yZSB2YWNjaW5hdGlvbnMNCmNhc2VzWywgVG90YWxDYXNlcyA6PSByb3dTdW1zKGNhc2VzWywgLi50b3RhbENhc2VzXSwgbmEucm0gPSBUUlVFKV0NCmBgYA0KDQoNCmBgYHtyfQ0KZ3JhZGVzIDwtIGMoIkErIj00LjMsIkEiPTQsIkEtIj0zLjcsIkIrIj0zLjMsIkIiPTMsIkItIj0yLjcsICJDKyI9Mi4zLCJDIj0yLCJDLSI9MS43LCJEKyI9MS4zLCJEIj0xLCJELSI9IDAuNywgIkYiPTApDQoNCmdmdW5jIDwtIGZ1bmN0aW9uKHgpIHsNCiAgZ3JhZGVzW2FzLmNoYXJhY3Rlcih4KV0NCn0NCg0KZGlzdGFuY2luZ1ssIGMoImdyYWRlX3RvdGFsIiwgImdyYWRlX2Rpc3RhbmNlIiwgImdyYWRlX3Zpc2l0YXRpb24iLCAiZ3JhZGVfZW5jb3VudGVycyIpIDo9DQogICAgICAgICAgICAgLihnZnVuYyhncmFkZV90b3RhbCksIGdmdW5jKGdyYWRlX2Rpc3RhbmNlKSwgZ2Z1bmMoZ3JhZGVfdmlzaXRhdGlvbiksIGdmdW5jKGdyYWRlX2VuY291bnRlcnMpKV0NCmBgYA0KDQoNCmBgYHtyfQ0KIyBzZXQgdGhlIEZJUFMgY29sdW1uIGFzIHRoZSBrZXkgZm9yIGZhc3RlciBkYXRhLnRhYmxlIG9wZXJhdGlvbnMNCiMgdGltZSBzZXJpZXMNCnNldGtleShjYXNlcywgRklQUykNCnNldGtleSh2YWNjaW5lQ291bnR5LCBGSVBTKQ0Kc2V0a2V5KGRpc3RhbmNpbmcsIEZJUFMpDQojIG5vdCB0aW1lIHNlcmllcw0Kc2V0a2V5KG1hc2tpbmcsIEZJUFMpDQpzZXRrZXkocG9wLCBGSVBTKQ0KYGBgDQoNCg0KYGBge3J9DQojIHBpdm90X3dpZGVyKHZhY2NpbmVDb3VudHksIG5hbWVzX2Zyb20gPSBEYXRlLCB2YWx1ZXNfZnJvbSA9ICkNCmBgYA0KDQoNCmBgYHtyfQ0KY3N2IDwtIGNhc2VzW3BvcCxdW2Rpc3RhbmNpbmcsXVttYXNraW5nLF0NCndyaXRlLmNzdihjc3YsICJEYXRhL2Nhc2VzUG9wRGlzdGFuY2luZ01hc2tpbmcuY3N2Iiwgcm93Lm5hbWVzID0gRkFMU0UpDQpvcGVueGxzeDo6d3JpdGUueGxzeChjc3YsICJEYXRhL2Nhc2VzUG9wRGlzdGFuY2luZ01hc2tpbmcueGxzeCIsIG92ZXJ3cml0ZSA9IFRSVUUpDQpkYXRlcyA8LSBncmVwKCJEWzAtOV17NH1fWzAtOV17Mn1fWzAtOV17Mn0iLCBjb2xuYW1lcyhjc3YpKQ0Kb3Blbnhsc3g6OndyaXRlLnhsc3goY3N2WywgIS4uZGF0ZXNdLCAiRGF0YS9ub0RhdGVzLnhsc3giLCBvdmVyd3JpdGUgPSBUUlVFKQ0KY3N2DQpgYGANCg0K